RT ANALYSIS

####THIS CHUNK WILL NEED TO BE ADJUSTED TO READ FROM GITHUB DATA###
#THIS CHUNK MAKES MORE SENSE TO MOVE AFTER FUNCTIONS ARE GENERATED
#IT IS LOCATED HERE FOR VISIBILITY ONLY, ONCE MOVED TO GITHUB, RELOCATE TO A MORE LOGICAL
#LOCATION IN THE CODE AS DESIRED
getwd()
## [1] "C:/Users/jeffb/Desktop/Life/personal-projects/COVID"
#read in full DSHS data with county and TSA information
full.dshs<-read.csv("combined-datasets/county.csv")#change path as needed
tsa.dshs<-read.csv("combined-datasets/tsa.csv")#change path as needed
rt.data.clean<-function(covid.data)#note: mydata will be either full.dshs or tsa.dshs data
  {

library(dplyr)
library(tidyr)
library(reshape2)
  #check the variable types of covid.data 
  #(comment out when running, this is for testing only)
    #str(covid.data)
  
  #keep variables from this list if they are in the data set
      #(county, date, population, TSA_Name, cases_daily, Population_DSHS, Population_Census)
  covid.data<-covid.data[,names(covid.data)%in% 
                           c("County", #name of texas county 
                             "Date", #date of data recorded
                             "TSA_Name", #name of texas TSA group for county
                             "TSA",
                             "Metro_Area", #indicateor of metro/nonmetro county
                             "Cases_Daily", #daily new cases
                             "Population_DSHS")] #population for county per Census estimates
                                                  #Note: Census data is from 2010, these values are projections 
                                                        #from that Census as provided by the Census Bureau
  #change Date variable to date format
  covid.data$Date<-as.Date(covid.data$Date)
  #change cases_daily to numeric - if not already numeric
  covid.data$Cases_Daily<-as.numeric(covid.data$Cases_Daily)
  #change any cases_daily below zero to zero
  #NOTE: We shouldn'te actually have cases_daily below zero, 
   #these are a result of data problems related to how cases are counted
   #we will have to address this in our data as this will cause 
   #potential issues with rt estimates
  covid.data<-covid.data%>%mutate(Cases_Daily=ifelse(Cases_Daily<0, 0, Cases_Daily))
  return(covid.data)
}
#separate county, metro, TSA and State data into separate Data frames
rt.data.organize<-function(mycounty, mytsa){
  library(dplyr)
  library(tidyr)
  #call the rt.data.clean function on mydata
  cleaned.county<-rt.data.clean(mycounty)
  cleaned.tsa<-rt.data.clean(mytsa)

  #create metro.df by mutating by grouping by Metro
  metro.temp<-cleaned.county%>%
    group_by(Metro_Area,Date)%>%
    mutate(metro_cases_daily=sum(Cases_Daily, na.rm=TRUE))%>%#generate variable of state daily cases
    mutate(metro_pop_DSHS=sum(Population_DSHS, na.rm=TRUE))%>%#generate state population_DSHS variable
    dplyr::select(Date,Metro_Area, metro_cases_daily, metro_pop_DSHS)%>%#select only variables of interest
    distinct()#keep only distinct rows (this will drop repeated rows, now that we have only state level va
  metro.df<-data.frame(Date=metro.temp$Date,
                       Metro_Area=metro.temp$Metro_Area,
                       Cases_Daily=metro.temp$metro_cases_daily,
                       Population_DSHS=metro.temp$metro_pop_DSHS)
  #Drop TSA_Name from county data frame
  county.df<-cleaned.county%>%dplyr::select(-TSA_Name, -Metro_Area)
  
   #create TSA.df, use rt.data.clean function
  TSA.df<-rt.data.clean(mytsa)
  
  #create state.df
  #at the state level we want daily new cases, and population variables from DSHS and Census data
  state.temp<-cleaned.tsa%>%
    group_by(Date)%>%
    mutate(state_cases_daily=sum(Cases_Daily, na.rm=TRUE))%>%#generate variable of state daily cases
    mutate(state_pop_DSHS=sum(Population_DSHS, na.rm=TRUE))%>%#generate state population_DSHS variable
    dplyr::select(Date,state_cases_daily, state_pop_DSHS)%>%#select only variables of interest
    distinct()#keep only distinct rows (this will drop repeated rows, now that we have only state level vars)
  state.df<-data.frame(Date=state.temp$Date, 
                       Cases_Daily=state.temp$state_cases_daily,
                       Population_DSHS=state.temp$state_pop_DSHS)
  #return a list of the three dataframes generated
 return(list(county=county.df, TSA=TSA.df, state=state.df, metro=metro.df))
}
rt.df.extraction<-function(Rt.estimate.output){

   #first convert named vector back to dataframe
  #reorder and rename columns from the R0.estimate output
  rt.df<-setNames(stack(Rt.estimate.output$estimates$TD$R)[2:1], c('Date', 'Rt'))
 
  #extract the upper and lower limits of the Rt 95% confidence intervals, these will be lists
  CI.lower.list<-Rt.estimate.output$estimates$TD$conf.int$lower
  CI.upper.list<-Rt.estimate.output$estimates$TD$conf.int$upper

  #use unlist function to format as vector
  CI.lower <- unlist(CI.lower.list, recursive = TRUE, use.names = TRUE)
  CI.upper <- unlist(CI.upper.list, recursive = TRUE, use.names = TRUE)

  #add the upper and lower bounds to the dataframe
  rt.df$lower<-CI.lower
  rt.df$upper<-CI.upper
  
  rt.df$Date = as.Date(rt.df$Date)
  
  return(rt.df)
}
#############################
#Remove this code later, for testing purposes only
#creating test county
#testcounty.df<-county.daily%>%subset(County=="Archer")
#testcounty.rt<-covid.rt(testcounty.df)
  #mydata<-testcounty.df

#Test the covid.function on TSA subset
#  #TSA.G<-TSA.daily%>%subset(TSA_Name=="Panhandle RAC")

###############################
#Function to generate Rt estimates, need to read in data frame and population size
covid.rt<-function(mydata){
library(R0)
  #change na values to 0
  mydata<-mydata%>%replace(is.na(.),0)
  #create a variable that is the sum of Cases_Daily
  sum.daily.cases<-sum(mydata$Cases_Daily)
  
  #create a vector of new cases
  mydata.new<-pull(mydata, Cases_Daily)
  #create a vector of dates
  date.vector<-pull(mydata, Date)
  #create a named numerical vector using the date.vector as names of new cases
    #Note: this is needed to run R0 package function estimate.R()
  names(mydata.new)<-c(date.vector)
  
  #create the generation time 
  ##### NOTE: THIS MAY NEED MODIFICATION ONCE MORE INFORMATIONIS AVAILABLE ####

  #gen.time<-generation.time("lognormal", c(4.0, 2.9))#verify these values and distribution/update as needed
  #gen.time<-generation.time("lognormal", c(4.7,2.9)) #Nishiura

  #Tapiwa, Ganyani "Esimating the gen interval for Covid-19":
  # gen.time<-generation.time("gamma", c(5.2, 1.72)) #Singapore
  #gen.time<-generation.time("gamma", c(3.95, 1.51)) #Tianjin
  gen.time<-generation.time("gamma", c(3.96, 4.75))

  #get DSHS population and census population
  pop.DSHS<-mydata$Population_DSHS[1]

  #Find min.date, max.date and row number of max.date
  min.date<-min(mydata$Date)
  max.date<-max(mydata$Date)
  # str(mydata)

  #get row number of March 9 and first nonzero entry
  #find max row between the two (this will be beginning of rt data used)
  march9.row<-which(mydata$Date=="2020-03-09")
  first.nonzero<-min(which(mydata$Cases_Daily>0))
  last.nonzero<-max(which(mydata$Cases_Daily>0))
  minrow<-max(march9.row,first.nonzero)
  
  #Set last row that will be used, either todays date or last nonzero row
  j<-as.integer(min(last.nonzero, max.date))
  
  if(is.na(minrow)){
    rt.DSHS.df<-data.frame(Date=as.Date(mydata$Date), Rt=rep(NA, j), lower=rep(NA, j), upper=rep(NA, j))
  }
  
  if(sum.daily.cases<=50){
    k=length(mydata$Date)
    rt.DSHS.df<-data.frame(Date=as.Date(mydata$Date), Rt=rep(NA, k), lower=rep(NA, k), upper=rep(NA, k))
  }
  #only move forward if both previous error catch options are false
  if(is.na(minrow)==FALSE & sum.daily.cases>50){
    #get the row number for the mindrow date
    i<-minrow
  
    #reduce the vector to be rows from min date (March 9 or first nonzero case) to current date
    mydata.newest<-mydata.new[i:j]
    
    
    rt.DSHS<-estimate.R(mydata.newest, 
                        gen.time, 
                        #begin=as.integer(1), 
                        #end=total.days, 
                        methods=c("TD"), 
                        pop.size=pop.DSHS,
                        nsim=1000)
  
      rt.DSHS.df<-rt.df.extraction(rt.DSHS)
  }
  
  rt.out = subset(rt.DSHS.df, Date > as.Date('2020-03-09'))
  return(rt.out)
}

County

Metro

TSA

#run covid.rt function on state level data 
#(this will be a data frame output)
state.rt.df<-covid.rt(state.daily)
## Warning in est.R0.TD(epid = c(`2020-03-09` = 7, `2020-03-10` = 3, `2020-03-11` =
## 3, : Simulations may take several minutes.
## Warning in est.R0.TD(epid = c(`2020-03-09` = 7, `2020-03-10` = 3, `2020-03-11` =
## 3, : Using initial incidence as initial number of cases.
########## KABLE TABLES CAN BE COMMENTED OUT WHEN RUN IN GIT ##############
# library(kableExtra)
# county.rt.df[1:25,]%>%kable(caption="Texas County Level RT Estimates")%>%kable_styling(full_width=FALSE)
# metro.rt.df[1:25,]%>%kable(caption="Texas Metro Level RT Estimates")%>%kable_styling(full_width=FALSE)
# TSA.rt.df[1:25,]%>%kable(caption="Texas TSA RT Estimates")%>%kable_styling(full_width = FALSE)
# state.rt.df[1:25,]%>%kable(caption="Texas State RT Estimates")%>%kable_styling(full_width=FALSE)

#NOTE: FILE DESTINATION WILL NEED TO BE UPDATED FOR EACH OF THESE TO 
  #MATCH DIRECTORY IN YOUR WORKING DIRECTORY

write.csv(county.rt.df, file="statistical-output/rt/county_rt.csv", row.names=FALSE)
write.csv(metro.rt.df, file="statistical-output/rt/metro_rt.csv", row.names=FALSE)
write.csv(TSA.rt.df, file="statistical-output/rt/tsa_rt.csv", row.names=FALSE)
write.csv(state.rt.df, file="statistical-output/rt/state_rt.csv", row.names=FALSE)

TIME SERIES

#ts.rt.data.clean function will clean data, dropping unwanted variables
#note: mydata will be either full.dshs or tsa.dshs data
ts.data.clean<-function(covid.data) {
  
  library(dplyr)
  library(tidyr)
  library(reshape2)
  # str(covid.data)
  #keep variables from this list if they are in the data set
      #(county, date, population, TSA_Name, Cases_Daily, Population_DSHS, Population_Census)
  covid.data<-covid.data[,names(covid.data)%in% 
                           c("County", #name of texas county 
                             "Date", #date of data recorded
                             "TSA",
                             "TSA_Name", #name of texas TSA group for county
                             "Metro_Area", #indicateor of metro/nonmetro county
                             "Cases_Daily", #daily new cases
                             "Cases_Cumulative",
                             "Population_DSHS")] #population for county per DSHS data
                                                  #Note: Census data is from 2010, these values are projections 
                                                        #from that Census as provided by the Census Bureau
  #change Date variable to date format
  covid.data$Date<-as.Date(covid.data$Date)
  #change Cases_Daily to numeric - if not already numeric
  covid.data$Cases_Daily<-as.numeric(covid.data$Cases_Daily)
  #change any Cases_Daily below zero to zero
  covid.data<-covid.data%>%mutate(Cases_Daily=ifelse(Cases_Daily<0, 0, Cases_Daily))
  return(covid.data)
  
}
#separate county, metro, TSA and State data into separate Data frames
ts.data.organize<-function(mycounty, mytsa){
  library(dplyr)
  library(tidyr)
  
  ### COUNTY ###
  
  #call the ts.data.clean function on mydata
  cleaned.county<-ts.data.clean(mycounty)
  
  # drop NA dates
  cleaned.county<-subset(cleaned.county, !is.na(Date) & Date>=as.Date('2020-03-04'))

  #Drop TSA_Name from county data frame
  county.df<-cleaned.county%>%dplyr::select(-TSA_Name, -Metro_Area)
  county.df$Level = 'County'
  colnames(county.df)[1] = 'Level_Name'
  ### METRO ###

  #create metro.df by mutating by grouping by Metro
  metro.temp<-cleaned.county%>%
    group_by(Metro_Area,Date)%>%
    mutate(metro_Cases_Daily=sum(Cases_Daily, na.rm=TRUE))%>%#generate variable of state daily cases
    mutate(metro_Cases_Cumulative=sum(Cases_Cumulative, na.rm=TRUE))%>%#generate variable of state daily cases
    mutate(metro_pop_DSHS=sum(Population_DSHS, na.rm=TRUE))%>%#generate state population_DSHS variable
    dplyr::select(Date,Metro_Area, metro_Cases_Daily, metro_Cases_Cumulative, metro_pop_DSHS)%>%#select only variables of interest
    distinct()#keep only distinct rows (this will drop repeated rows, now that we have only state level va
    
  metro.df<-data.frame(Date=metro.temp$Date,
                       Level = 'metro',
                       Level_Name=metro.temp$Metro_Area,
                       Cases_Daily=metro.temp$metro_Cases_Daily,
                       Cases_Cumulative = metro.temp$metro_Cases_Cumulative,
                       Population_DSHS=metro.temp$metro_pop_DSHS)
  
    
  # drop NA dates
  metro.df<-subset(metro.df, !is.na(Date) & Date>=as.Date('2020-03-04'))
  
  ### TSA ###
  TSA.df<-ts.data.clean(mytsa)
  TSA.df<-subset(TSA.df, Date>=as.Date('2020-03-04'))
  
  TSA.df$Level = 'TSA'
  colnames(TSA.df)[2] = 'Level_Name'
  ### STATE ###
  
  #create state.df
  #at the state level we want daily new cases, and population variables from DSHS and Census data
  state.temp<-TSA.df%>%
    group_by(Date)%>%
    mutate(state_Cases_Daily=sum(Cases_Daily, na.rm=TRUE))%>%#generate variable of state daily cases
    mutate(state_Cases_Cumulative=sum(Cases_Cumulative, na.rm=TRUE))%>%#generate variable of state Cumulative cases
    mutate(state_pop_DSHS=sum(Population_DSHS, na.rm=TRUE))%>%#generate state population_DSHS variable
    dplyr::select(Date,state_Cases_Daily, state_Cases_Cumulative, state_pop_DSHS)%>%#select only variables of interest
    distinct()#keep only distinct rows (this will drop repeated rows, now that we have only state level vars)
    
  state.df<-data.frame(Date=state.temp$Date,
                       Level = 'State',
                       Level_Name = 'Texas',
                       Cases_Daily=state.temp$state_Cases_Daily,
                       Cases_Cumulative=state.temp$state_Cases_Cumulative, 
                       Population_DSHS=state.temp$state_pop_DSHS)
  
  state.df<- subset(state.df, Date>=as.Date('2020-03-04'))
  
  #return a list of the three dataframes generated
  return(list(county=county.df, TSA=TSA.df, state=state.df, metro=metro.df))
}
library(forecast)
## Warning: package 'forecast' was built under R version 3.6.3
## Registered S3 method overwritten by 'xts':
##   method     from
##   as.zoo.xts zoo
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## 
## Attaching package: 'forecast'
## The following object is masked from 'package:nlme':
## 
##     getResponse
library(ggplot2) 

library(astsa)
## Warning: package 'astsa' was built under R version 3.6.3
## 
## Attaching package: 'astsa'
## The following object is masked from 'package:forecast':
## 
##     gas
library(forecast)
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(ggplot2)
library(fpp2)
## Warning: package 'fpp2' was built under R version 3.6.3
## Loading required package: fma
## Warning: package 'fma' was built under R version 3.6.3
## 
## Attaching package: 'fma'
## The following objects are masked from 'package:astsa':
## 
##     chicken, sales
## The following objects are masked from 'package:MASS':
## 
##     cement, housing, petrol
## Loading required package: expsmooth
## Warning: package 'expsmooth' was built under R version 3.6.3
## 
## Attaching package: 'fpp2'
## The following object is masked from 'package:astsa':
## 
##     oil
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
covid.arima.forecast<-function(mydata)
{
  maxdate<-max(mydata$Date)
  mindate <-min(mydata$Date)
  prediction.period<-10 #this can be changed as desired
  
  if(max(mydata$Cases_Daily>=5, na.rm = TRUE))
  {
    
    #for the time series, you need a start day, which is the year, and the day number - 
    #we need to double check how this works exactly
    my.timeseries<-ts(mydata$Cases_Daily)
    
    #d=0 restricts first differencing to 0 so that daily cases aren't differenced
    #get a fit model using the best ARIMA from auto.arima
    arima.fit<-forecast::auto.arima(my.timeseries, d=0)
    
    #use predict() on the fit model
    # prediction.period is how many more days to forecast
    # 10 day forecast
    arima.forecast<-forecast::forecast(arima.fit, h = prediction.period) 
    
    #auto correlation function
    # diagnostics
    acf<-ggAcf(my.timeseries, lag.max=30)
    pacf<-ggPacf(my.timeseries, lag.max=30)
    grid.arrange(acf, pacf, nrow=2)
    ggsave(paste0('statistical-output/time-series/diagnostics/', mydata$Level[1],'/', mydata$Level_Name[1], '.png'))
     
    #return a dataframe of the arima model(Daily cases by date) 
    #and forecasted values ***NOT SURE HOW THIS WILL WORK***
    arima.out<-data.frame(Date = seq(mindate, maxdate + 10, by = 'days'),
                          Cases_Daily = c(my.timeseries, arima.forecast[['mean']]),
                          CI_Lower = c(rep(NA, times = length(my.timeseries)), arima.forecast[['lower']][, 2]),
                          CI_Upper = c(rep(NA, times = length(my.timeseries)), arima.forecast[['upper']][, 2]))

    } else {
      
    arima.out<-data.frame(Date = seq(mindate, maxdate + 10, by = 'days'),
                          Cases_Daily = c(mydata$Cases_Daily, rep(NA, times = 10)),
                          CI_Lower = rep(NA, times = length(mydata$Date) + 10),
                          CI_Upper =  rep(NA, times = length(mydata$Date) + 10)
                          )
  }
  return(arima.out)
}

County

#get county forecasts using covid.arima.forecast function

#load nlme package
library(nlme)
#apply the covid.arima.forecast function to the county.Daily dataframe by county
#note that the output will be a list of dataframes
county.arima.output<-nlme::gapply(county.Daily, FUN = covid.arima.forecast, groups=county.Daily$Level_Name)
## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

#load data.table package
library(data.table)
county.arima.df<-data.table::rbindlist(county.arima.output, idcol=TRUE)

colnames(county.arima.df)[1] = 'County'

Metro

#get metro forecasts using covid.arima.forecast function
#load nlme package
library(nlme)
#apply the covid.arima.forecast function to the metro.Daily dataframe by metro
#note that the output will be a list of dataframes
metro.arima.output<-nlme::gapply(metro.Daily, FUN = covid.arima.forecast, groups=metro.Daily$Level_Name)
## Saving 7 x 5 in image

## Saving 7 x 5 in image

#load data.table package
library(data.table)
metro.arima.df<-data.table::rbindlist(metro.arima.output, idcol=TRUE)

colnames(metro.arima.df)[1] = 'Metro_Area'

TSA

#ge#load nlme package
library(nlme)
#apply the covid.arima.forecast function to the TSA.Daily dataframe by TSA
#note that the output will be a list of dataframes
TSA.arima.output<-nlme::gapply(TSA.Daily, FUN = covid.arima.forecast, groups=TSA.Daily$Level_Name)
## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

#load data.table package
library(data.table)
TSA.arima.df<-data.table::rbindlist(TSA.arima.output, idcol=TRUE)


colnames(TSA.arima.df)[1] = 'TSA'
TSA.arima.df = merge(TSA.arima.df, unique(TSA.daily[, c('TSA', 'TSA_Name')]), by = c('TSA'))
#get Texas forecasts using covid.arima.forecast function
texas.arima.df<-covid.arima.forecast(state.Daily)
## Saving 7 x 5 in image

#export arima results into .csv files
write.csv(texas.arima.df, 'statistical-output/time-series/state_timeseries.csv', row.names = F)
write.csv(TSA.arima.df, 'statistical-output/time-series/tsa_timeseries.csv', row.names = F)
write.csv(county.arima.df, 'statistical-output/time-series/county_timeseries.csv', row.names = F)
write.csv(metro.arima.df, 'statistical-output/time-series/metro_timeseries.csv', row.names = F)

STANDARD STATISTICAL TESTS

Ratio of cases

TSA level

library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## The following object is masked from 'package:base':
## 
##     date
cumcases.TSA <- TSA.Daily %>% dplyr::select(Date,Level_Name, Cases_Cumulative)
cumcases.TSA$Date <- ymd(cumcases.TSA$Date)

latestdate <- max(cumcases.TSA$Date)
earliestdate <- latestdate - 14
middate <- latestdate-7

week2 <- subset(cumcases.TSA, Date==latestdate, select=Cases_Cumulative) - 
  subset(cumcases.TSA, Date==middate, select=Cases_Cumulative)

week1 <- subset(cumcases.TSA, Date==middate, select=Cases_Cumulative) - 
  subset(cumcases.TSA, Date==earliestdate, select=Cases_Cumulative)

current_ratio <- (week2/week1)[,1]
#current_ratio[week1<10] <- NA
current_ratio[current_ratio<0] <- NA
tsa_current_ratio_dat <- data.frame(TSA=subset(cumcases.TSA, Date==latestdate, select=Level_Name)[,1],
                                    current_ratio=current_ratio, current_ratio_cat = cut(current_ratio, breaks=c(0,0.5,.9,1.1,1.5,2,4,8,max(current_ratio))))

# add TSA name
tsa_current_ratio_dat = merge(tsa_current_ratio_dat, unique(TSA.daily[, c('TSA', 'TSA_Name')]), by = 'TSA')

County level

cumcases.county <- county.Daily %>% dplyr::select(Date,Level_Name, Cases_Cumulative)
cumcases.county$Date <- ymd(cumcases.county$Date)

latestdate <- max(cumcases.county$Date, na.rm=T)
earliestdate <- latestdate - 14
middate <- latestdate-7

week2 <- subset(cumcases.county, Date==latestdate, select=Cases_Cumulative) - 
  subset(cumcases.county, Date==middate, select=Cases_Cumulative)
week1 <- subset(cumcases.county, Date==middate, select=Cases_Cumulative) - 
  subset(cumcases.county, Date==earliestdate, select=Cases_Cumulative)

current_ratio <- (week2/week1)[,1]
current_ratio[week1<10] <- NA
current_ratio[current_ratio<0] <- NA
county_current_ratio_dat <- data.frame(County=subset(cumcases.county, Date==latestdate, select=Level_Name)[,1], current_ratio=current_ratio, current_ratio_cat = cut(current_ratio, breaks=c(0,0.5,.9,1.1,1.5,2,4,8,max(current_ratio))))

Metropolitan level

cumcases.metro <- metro.Daily %>% dplyr::select(Date, Level_Name, Cases_Cumulative)
cumcases.metro$Date <- ymd(cumcases.metro$Date)

latestdate <- max(cumcases.metro$Date, na.rm=T)
earliestdate <- latestdate - 14
middate <- latestdate-7

week2 <- subset(cumcases.metro, Date==latestdate, select=Cases_Cumulative) - 
  subset(cumcases.metro, Date==middate, select=Cases_Cumulative)
week1 <- subset(cumcases.metro, Date==middate, select=Cases_Cumulative) - 
  subset(cumcases.metro, Date==earliestdate, select=Cases_Cumulative)

current_ratio <- (week2/week1)[,1]
current_ratio[current_ratio<0] <- NA
metro_current_ratio_dat <- data.frame(Metro_Area=subset(cumcases.metro, Date==latestdate, select=Level_Name)[,1], current_ratio=current_ratio, current_ratio_cat = cut(current_ratio, breaks=c(0,0.5,.9,1.1,1.5,2,4,8,max(current_ratio))))

Texas level

cumcases.state <- state.Daily %>% dplyr::select(Date, Cases_Cumulative)
cumcases.state$Date <- ymd(cumcases.state$Date)

latestdate <- max(cumcases.state$Date, na.rm=T)
earliestdate <- latestdate - 14
middate <- latestdate-7

week2 <- subset(cumcases.state, Date==latestdate, select=Cases_Cumulative) - 
  subset(cumcases.state, Date==middate, select=Cases_Cumulative)
week1 <- subset(cumcases.state, Date==middate, select=Cases_Cumulative) - 
  subset(cumcases.state, Date==earliestdate, select=Cases_Cumulative)

current_ratio <- (week2/week1)[,1]
current_ratio[current_ratio<0] <- NA
state_current_ratio_dat <- data.frame(current_ratio=current_ratio, current_ratio_cat = cut(current_ratio, breaks=c(0,0.5,.9,1.1,1.5,2,4,8,max(current_ratio))))

export

#export arima results into .csv files
write.csv(state_current_ratio_dat, 'statistical-output/standard-stats/case-ratios/state_case_ratio.csv', row.names = F)
write.csv(tsa_current_ratio_dat,'statistical-output/standard-stats/case-ratios/tsa_case_ratio.csv', row.names = F)
write.csv(county_current_ratio_dat, 'statistical-output/standard-stats/case-ratios/county_case_ratio.csv', row.names = F)
write.csv(metro_current_ratio_dat,'statistical-output/standard-stats/case-ratios/metro_case_ratio.csv', row.names = F)

rolling average

Omitted fow now, Dr. Yaseen is processing this in Tableau

MACHINE LEARNING

TODO: save output